{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "57186378",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Ch02-1-pandas-basic\n",
    "#  Overview of basic pandas functionality for manipulating data files and tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0bda3b91",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Libraries\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8e82984b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read in the Vaccine data\n",
    "vdata = pd.read_csv(\"2021VAERSDATA.csv.gz\", encoding=\"iso-8859-1\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8acaabe3",
   "metadata": {},
   "outputs": [],
   "source": [
    "vdata.columns "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c0f9e4a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "vdata.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1ca72187",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get the shape of your data\n",
    "vdata.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "374fc081",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Access a pandas array using an integer-based location\n",
    "vdata.iloc[0] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b1e0f929",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set the index using a column\n",
    "vdata = vdata.set_index(\"VAERS_ID\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "10cdfb94",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get the data using a key\n",
    "vdata.loc[916600] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5e40a3bc",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Use head to look at the top part of the data\n",
    "vdata.head(3) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7405daa6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Retrieve the first 3 rows using an array specification\n",
    "vdata.iloc[:3] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "28dde50b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Restrict the output to certain columns\n",
    "vdata.iloc[:5, 2:4] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "93836659",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Compute the maximum age in the dataset\n",
    "vdata[\"AGE_YRS\"].max() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "813b90ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "# A different style of notation\n",
    "vdata.AGE_YRS.max() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39bc5c15",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot the data\n",
    "vdata[\"AGE_YRS\"].sort_values().plot(use_index=False) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6a686cf9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Second plot\n",
    "vdata[\"AGE_YRS\"].plot.hist(bins=20) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "714251b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot using matplotlib\n",
    "import matplotlib.pyplot as plt \n",
    "fig, ax = plt.subplots(1, 2, sharey=True) \n",
    "fig.suptitle(\"Age of adverse events\") \n",
    "vdata[\"AGE_YRS\"].sort_values().plot(use_index=False, ax=ax[0], xlabel=\"Obervation\", ylabel=\"Age\") \n",
    "vdata[\"AGE_YRS\"].plot.hist(bins=20, orientation=\"horizontal\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2a9d6911",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Count events per year\n",
    "vdata[\"AGE_YRS\"].dropna().apply(lambda x: int(x)).value_counts() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "663f65b4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Count the number of people who died\n",
    "vdata.DIED.value_counts(dropna=False) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d88bec1d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set the is_dead column\n",
    "vdata[\"is_dead\"] = (vdata.DIED == \"Y\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7eecbe5e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Associate data about deaths with vaccine involved\n",
    "dead = vdata[vdata.is_dead] \n",
    "vax = pd.read_csv(\"2021VAERSVAX.csv.gz\", encoding=\"iso-8859-1\").set_index(\"VAERS_ID\") \n",
    "vax.groupby(\"VAX_TYPE\").size().sort_values() \n",
    "vax19 = vax[vax.VAX_TYPE == \"COVID19\"] \n",
    "vax19_dead = dead.join(vax19) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a0020e32",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Top 10 covid vaccine lots\n",
    "baddies = vax19_dead.groupby(\"VAX_LOT\").size().sort_values(ascending=False) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5f8f71be",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Print out the top lots\n",
    "for i, (lot,cnt) in enumerate(baddies.items()):\n",
    "    print(lot, cnt, len(vax19_dead[vax19_dead.VAX_LOT == lot].groupby(\"STATE\")))\n",
    "    if i == 10:\n",
    "        break"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
